library(slendr)
## The interface to all required Python modules has been activated.
library(ggtree)
## ggtree v3.2.1 For help: https://yulab-smu.top/treedata-book/
##
## If you use ggtree in published research, please cite the most appropriate paper(s):
##
## 1. Guangchuang Yu. Using ggtree to visualize data on tree-like structures. Current Protocols in Bioinformatics. 2020, 69:e96. doi:10.1002/cpbi.96
## 2. Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods for mapping and visualizing associated data on phylogeny using ggtree. Molecular Biology and Evolution. 2018, 35(12):3041-3043. doi:10.1093/molbev/msy194
## 3. Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam. ggtree: an R package for visualization and annotation of phylogenetic trees with their covariates and other associated data. Methods in Ecology and Evolution. 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
##
## Attaching package: 'ggtree'
## The following object is masked from 'package:slendr':
##
## expand
library(adegenet)
## Loading required package: ade4
##
## /// adegenet 2.1.5 is loaded ////////////
##
## > overview: '?adegenet'
## > tutorials/doc/questions: 'adegenetWeb()'
## > bug reports/feature requests: adegenetIssues()
library(StAMPP)
## Loading required package: pegas
## Loading required package: ape
##
## Attaching package: 'ape'
## The following object is masked from 'package:ggtree':
##
## rotate
## Registered S3 method overwritten by 'pegas':
## method from
## print.amova ade4
##
## Attaching package: 'pegas'
## The following object is masked from 'package:ape':
##
## mst
## The following object is masked from 'package:ade4':
##
## amova
library(vcfR)
##
## ***** *** vcfR *** *****
## This is vcfR 1.12.0
## browseVignettes('vcfR') # Documentation
## citation('vcfR') # Citation
## ***** ***** ***** *****
##
## Attaching package: 'vcfR'
## The following objects are masked from 'package:pegas':
##
## getINFO, write.vcf
library(ggplot2)
#read in vcf
vcfR <- read.vcfR("~/Desktop/mex.towhees/towhee.75.mac2.unlinked.nomito.vcf.gz")
## Scanning file to determine attributes.
## File attributes:
## meta lines: 4800
## header_line: 4801
## variant count: 2668
## column count: 41
##
Meta line 1000 read in.
Meta line 2000 read in.
Meta line 3000 read in.
Meta line 4000 read in.
Meta line 4800 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 2668
## Character matrix gt cols: 41
## skip: 0
## nrows: 2668
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant: 2668
## All variants processed
#read in sample info csv
sample.info<-read.csv("~/Desktop/mex.towhees/sample.locs.csv")
#reorder sampling file to match order of samples in vcf
sample.info<-sample.info[match(colnames(vcfR@gt)[-1], sample.info$ID),]
sample.info$ID == colnames(vcfR@gt)[-1]
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE
sample.info$pop<-c("mac","mac",rep("hyb", times=4),rep("ocai", times=4),rep("mac", times=10),
rep("hyb", times=4),rep("mac", times=4),rep("ocai", times=4))
#convert to genlight
gen<-vcfR2genlight(vcfR)
#
pop(gen)<-sample.info$pop
sample.div <- stamppNeisD(gen, pop = FALSE)
plot(nj(sample.div), type = "unrooted", cex = 1)
fst<-stamppFst(gen)
fst$Fsts
## mac hyb ocai
## mac NA NA NA
## hyb 0.04580035 NA NA
## ocai 0.07879940 0.03361103 NA
#calculate heterozygosity per sample
mat<-extract.gt(vcfR)
sample.info$het<-sample.info$sample.loc
#write loop to calculate heterozygosity
for (i in 1:ncol(mat)){
sample.info$het[i]<-sum(mat[,i][!is.na(mat[,i])] == "0/1")/sum(!is.na(mat[,i]))
}
sample.info$het<-as.numeric(as.character(sample.info$het))
#plot heterozygosity violin plots
ggplot(sample.info, aes(x=pop, y=het)) +
#geom_violin(trim = FALSE)+
geom_dotplot(binaxis='y', stackdir='center', dotsize = 2, alpha=.75,aes(fill=sample.loc))+
theme_classic()+
labs(x="sampling locality",y="heterozygosity")
## Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
### setup first model
#define each population
mac <- population("mac", time = 1, N = 10000)
ocai <- population("ocai", time = 50, N = 10000, parent = mac)
hyb <- population("hyb", time = 800, N = 10000, parent = ocai)
#add in the gene flow edge
gf <- gene_flow(from = ocai, to = hyb, start = 801, end = 1000, 0.75)
gf2 <- gene_flow(from = mac, to = hyb, start = 801, end = 1000, 0.75)
gf3 <- gene_flow(from = hyb, to = mac, start = 801, end = 1000, 0.75)
gf4 <- gene_flow(from = hyb, to = ocai, start = 801, end = 1000, 0.75)
#compile the model
model <- compile_model(populations = list(mac,ocai,hyb),
gene_flow = list(gf,gf2,gf3,gf4),
generation_time = 1,
sim_length = 1000,
path = "~/Downloads/slim.examples/tow",
overwrite = TRUE, force = TRUE)
#plot the model to make sure you set it up correctly
plot_model(model, sizes = TRUE, proportions = TRUE)
#schedule sampling
present_samples <- schedule_sampling(model, times = 1000, list(mac, 16), list(hyb, 8), list(ocai, 8), strict = TRUE)
present_samples
## # A tibble: 3 × 7
## time pop n y_orig x_orig y x
## <int> <chr> <int> <lgl> <lgl> <lgl> <lgl>
## 1 1000 mac 16 NA NA NA NA
## 2 1000 hyb 8 NA NA NA NA
## 3 1000 ocai 8 NA NA NA NA
#sim the model in msprime
msprime(model, sequence_length = 1e7, recombination_rate = 1e-8,
sampling = present_samples, random_seed = 314)
#check out resulting trees
ts_file <- file.path(model$path, "output_msprime.trees")
file.exists(ts_file)
## [1] TRUE
#load in tree sequence
ts <- ts_load(model)
ts
## ╔═══════════════════════════╗
## ║TreeSequence ║
## ╠═══════════════╤═══════════╣
## ║Trees │ 18062║
## ╟───────────────┼───────────╢
## ║Sequence Length│ 10000000║
## ╟───────────────┼───────────╢
## ║Time Units │generations║
## ╟───────────────┼───────────╢
## ║Sample Nodes │ 64║
## ╟───────────────┼───────────╢
## ║Total Size │ 2.7 MiB║
## ╚═══════════════╧═══════════╝
## ╔═══════════╤═════╤═════════╤════════════╗
## ║Table │Rows │Size │Has Metadata║
## ╠═══════════╪═════╪═════════╪════════════╣
## ║Edges │63884│ 1.9 MiB│ No║
## ╟───────────┼─────┼─────────┼────────────╢
## ║Individuals│ 32│920 Bytes│ No║
## ╟───────────┼─────┼─────────┼────────────╢
## ║Migrations │ 0│ 8 Bytes│ No║
## ╟───────────┼─────┼─────────┼────────────╢
## ║Mutations │ 0│ 16 Bytes│ No║
## ╟───────────┼─────┼─────────┼────────────╢
## ║Nodes │11287│308.6 KiB│ No║
## ╟───────────┼─────┼─────────┼────────────╢
## ║Populations│ 3│301 Bytes│ Yes║
## ╟───────────┼─────┼─────────┼────────────╢
## ║Provenances│ 1│ 2.7 KiB│ No║
## ╟───────────┼─────┼─────────┼────────────╢
## ║Sites │ 0│ 16 Bytes│ No║
## ╚═══════════╧═════╧═════════╧════════════╝
ts_coalesced(ts) #confirm coalescence
## [1] TRUE
#add mutations to simulation
ts <- ts_mutate(ts, mutation_rate = 1e-8, random_seed = 3141)
ts
## ╔═══════════════════════════╗
## ║TreeSequence ║
## ╠═══════════════╤═══════════╣
## ║Trees │ 18062║
## ╟───────────────┼───────────╢
## ║Sequence Length│ 10000000║
## ╟───────────────┼───────────╢
## ║Time Units │generations║
## ╟───────────────┼───────────╢
## ║Sample Nodes │ 64║
## ╟───────────────┼───────────╢
## ║Total Size │ 4.0 MiB║
## ╚═══════════════╧═══════════╝
## ╔═══════════╤═════╤═════════╤════════════╗
## ║Table │Rows │Size │Has Metadata║
## ╠═══════════╪═════╪═════════╪════════════╣
## ║Edges │63884│ 1.9 MiB│ No║
## ╟───────────┼─────┼─────────┼────────────╢
## ║Individuals│ 32│920 Bytes│ No║
## ╟───────────┼─────┼─────────┼────────────╢
## ║Migrations │ 0│ 8 Bytes│ No║
## ╟───────────┼─────┼─────────┼────────────╢
## ║Mutations │20784│751.0 KiB│ No║
## ╟───────────┼─────┼─────────┼────────────╢
## ║Nodes │11287│308.6 KiB│ No║
## ╟───────────┼─────┼─────────┼────────────╢
## ║Populations│ 3│301 Bytes│ Yes║
## ╟───────────┼─────┼─────────┼────────────╢
## ║Provenances│ 2│ 3.5 KiB│ No║
## ╟───────────┼─────┼─────────┼────────────╢
## ║Sites │20755│506.7 KiB│ No║
## ╚═══════════╧═════╧═════════╧════════════╝
# extract a tree in the tree sequence
tree <- ts_phylo(ts, 280)
## Starting checking the validity of tree...
## Found number of tips: n = 64
## Found number of nodes: m = 63
## Done.
tree
##
## Phylogenetic tree with 64 tips and 63 internal nodes.
##
## Tip labels:
## 63 (ocai_8), 62 (ocai_8), 61 (ocai_7), 60 (ocai_7), 59 (ocai_6), 58 (ocai_6), ...
## Node labels:
## 8960, 65, 66, 113, 205, 227, ...
##
## Rooted; includes branch lengths.
ggtree(tree) +
geom_point2(aes(subset = !isTip)) + # points for internal nodes
geom_tiplab() + # sample labels for tips
hexpand(0.1) # make more space for the tip labels
#write out vcf
ts_vcf(ts, path = "~/Downloads/slim.examples/output.vcf.gz")
#read in vcf
vcfR <- read.vcfR("~/Downloads/slim.examples/output.vcf.gz")
## Scanning file to determine attributes.
## File attributes:
## meta lines: 5
## header_line: 6
## variant count: 20755
## column count: 41
##
Meta line 5 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 20755
## Character matrix gt cols: 41
## skip: 0
## nrows: 20755
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant: 20755
## All variants processed
#convert to genlight
gen<-vcfR2genlight(vcfR)
## Warning in vcfR2genlight(vcfR): Found 23 loci with more than two alleles.
## Objects of class genlight only support loci with two alleles.
## 23 loci will be omitted from the genlight object.
#
pop(gen)<-gen@ind.names
sample.div <- stamppNeisD(gen, pop = FALSE)
plot(nj(sample.div), type = "unrooted", cex = 1)
sample.sets<-list(mac = c("mac_1","mac_2","mac_3","mac_4","mac_5","mac_6","mac_7","mac_8"),
ocai= c("ocai_1","ocai_2","ocai_3","ocai_4"),
hyb = c("hyb_1","hyb_2","hyb_3","hyb_4"))
ts_fst(ts, sample.sets)
## # A tibble: 3 × 3
## x y Fst
## <chr> <chr> <dbl>
## 1 mac ocai -0.000609
## 2 mac hyb 0.00419
## 3 ocai hyb 0.0104
#calculate heterozygosity per sample
mat<-extract.gt(vcfR)
sample.info$sim.het<-c(rep("hyb", times=8),rep("mac", times=16),rep("ocai", times=8))
sample.info$sim.pop<-c(rep("hyb", times=8),rep("mac", times=16),rep("ocai", times=8))
#write loop to calculate heterozygosity
for (i in 1:ncol(mat)){
sample.info$sim.het[i]<-(sum(mat[,i][!is.na(mat[,i])] == "0|1")+sum(mat[,i][!is.na(mat[,i])] == "1|0"))/sum(!is.na(mat[,i]))
}
sample.info$sim.het<-as.numeric(as.character(sample.info$sim.het))
#plot heterozygosity violin plots
ggplot(sample.info, aes(x=sim.pop, y=sim.het)) +
#geom_violin(trim = FALSE)+
geom_dotplot(binaxis='y', stackdir='center', dotsize = 2, alpha=.75,aes(fill=sample.loc))+
theme_classic()+
labs(x="sampling locality",y="heterozygosity")
## Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
#define each population
mac <- population("mac", time = 1, N = 10000)
ocai <- population("ocai", time = 50, N = 10000, parent = mac)
hyb <- population("hyb", time = 4000, N = 10000, parent = ocai)
#add in the gene flow edge
#add in the gene flow edge
gf <- gene_flow(from = ocai, to = hyb, start = 4001, end = 5000, 0.75)
gf2 <- gene_flow(from = mac, to = hyb, start = 4001, end = 5000, 0.75)
gf3 <- gene_flow(from = hyb, to = mac, start = 4001, end = 5000, 0.75)
gf4 <- gene_flow(from = hyb, to = ocai, start = 4001, end = 5000, 0.75)
#compile the model
model <- compile_model(populations = list(mac,ocai,hyb),
gene_flow = list(gf,gf2,gf3,gf4),
generation_time = 1,
sim_length = 5000,
path = "~/Downloads/slim.examples/tow2",
overwrite = TRUE, force = TRUE)
#plot the model to make sure you set it up correctly
plot_model(model, sizes = TRUE, proportions = TRUE)
#schedule sampling
present_samples <- schedule_sampling(model, times = 5000, list(mac, 16), list(hyb, 8), list(ocai, 8), strict = TRUE)
present_samples
## # A tibble: 3 × 7
## time pop n y_orig x_orig y x
## <int> <chr> <int> <lgl> <lgl> <lgl> <lgl>
## 1 5000 mac 16 NA NA NA NA
## 2 5000 hyb 8 NA NA NA NA
## 3 5000 ocai 8 NA NA NA NA
#sim the model in msprime
msprime(model, sequence_length = 1e7, recombination_rate = 1e-8,
sampling = present_samples, random_seed = 315)
#load in tree sequence
ts <- ts_load(model)
#add mutations to simulation
ts <- ts_mutate(ts, mutation_rate = 1e-8, random_seed = 3141)
#write out vcf
ts_vcf(ts, path = "~/Downloads/slim.examples/output.2.vcf.gz")
#read in vcf
vcfR <- read.vcfR("~/Downloads/slim.examples/output.2.vcf.gz")
## Scanning file to determine attributes.
## File attributes:
## meta lines: 5
## header_line: 6
## variant count: 26646
## column count: 41
##
Meta line 5 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 26646
## Character matrix gt cols: 41
## skip: 0
## nrows: 26646
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant: 26646
## All variants processed
#convert to genlight
gen<-vcfR2genlight(vcfR)
## Warning in vcfR2genlight(vcfR): Found 31 loci with more than two alleles.
## Objects of class genlight only support loci with two alleles.
## 31 loci will be omitted from the genlight object.
#
pop(gen)<-gen@ind.names
sample.div <- stamppNeisD(gen, pop = FALSE)
plot(nj(sample.div), type = "unrooted", cex = 1)
ts_fst(ts, sample.sets)
## # A tibble: 3 × 3
## x y Fst
## <chr> <chr> <dbl>
## 1 mac ocai 0.0214
## 2 mac hyb 0.0166
## 3 ocai hyb 0.0121
#calculate heterozygosity per sample
mat<-extract.gt(vcfR)
sample.info$sim.het<-c(rep("hyb", times=8),rep("mac", times=16),rep("ocai", times=8))
sample.info$sim.pop<-c(rep("hyb", times=8),rep("mac", times=16),rep("ocai", times=8))
#write loop to calculate heterozygosity
for (i in 1:ncol(mat)){
sample.info$sim.het[i]<-(sum(mat[,i][!is.na(mat[,i])] == "0|1")+sum(mat[,i][!is.na(mat[,i])] == "1|0"))/sum(!is.na(mat[,i]))
}
sample.info$sim.het<-as.numeric(as.character(sample.info$sim.het))
#plot heterozygosity violin plots
ggplot(sample.info, aes(x=sim.pop, y=sim.het)) +
#geom_violin(trim = FALSE)+
geom_dotplot(binaxis='y', stackdir='center', dotsize = 2, alpha=.75,aes(fill=sample.loc))+
theme_classic()+
labs(x="sampling locality",y="heterozygosity")
## Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
#define each population
mac <- population("mac", time = 1, N = 10000)
ocai <- population("ocai", time = 50, N = 10000, parent = mac)
hyb <- population("hyb", time = 4000, N = 10000, parent = ocai)
#add in the gene flow edge
#add in the gene flow edge
gf <- gene_flow(from = ocai, to = hyb, start = 4001, end = 5000, 0.5)
gf2 <- gene_flow(from = mac, to = hyb, start = 4001, end = 5000, 0.5)
gf3 <- gene_flow(from = hyb, to = mac, start = 4001, end = 5000, 0.5)
gf4 <- gene_flow(from = hyb, to = ocai, start =4001, end = 5000, 0.5)
#compile the model
model <- compile_model(populations = list(mac,ocai,hyb),
gene_flow = list(gf,gf2,gf3,gf4),
generation_time = 1,
sim_length = 5000,
path = "~/Downloads/slim.examples/tow3",
overwrite = TRUE, force = TRUE)
#plot the model to make sure you set it up correctly
plot_model(model, sizes = TRUE, proportions = TRUE)
#sim the model in msprime
msprime(model, sequence_length = 1e7, recombination_rate = 1e-8,
sampling = present_samples, random_seed = 315)
#load in tree sequence
ts <- ts_load(model)
#add mutations to simulation
ts <- ts_mutate(ts, mutation_rate = 1e-8, random_seed = 3141)
#write out vcf
ts_vcf(ts, path = "~/Downloads/slim.examples/output.3.vcf.gz")
#read in vcf
vcfR <- read.vcfR("~/Downloads/slim.examples/output.3.vcf.gz")
## Scanning file to determine attributes.
## File attributes:
## meta lines: 5
## header_line: 6
## variant count: 26372
## column count: 41
##
Meta line 5 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 26372
## Character matrix gt cols: 41
## skip: 0
## nrows: 26372
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant: 26372
## All variants processed
#convert to genlight
gen<-vcfR2genlight(vcfR)
## Warning in vcfR2genlight(vcfR): Found 25 loci with more than two alleles.
## Objects of class genlight only support loci with two alleles.
## 25 loci will be omitted from the genlight object.
#
pop(gen)<-gen@ind.names
sample.div <- stamppNeisD(gen, pop = FALSE)
plot(nj(sample.div), type = "unrooted", cex = 1)
ts_fst(ts, sample.sets)
## # A tibble: 3 × 3
## x y Fst
## <chr> <chr> <dbl>
## 1 mac ocai 0.0510
## 2 mac hyb 0.0359
## 3 ocai hyb 0.0157
#calculate heterozygosity per sample
mat<-extract.gt(vcfR)
sample.info$sim.het<-c(rep("hyb", times=8),rep("mac", times=16),rep("ocai", times=8))
sample.info$sim.pop<-c(rep("hyb", times=8),rep("mac", times=16),rep("ocai", times=8))
#write loop to calculate heterozygosity
for (i in 1:ncol(mat)){
sample.info$sim.het[i]<-(sum(mat[,i][!is.na(mat[,i])] == "0|1")+sum(mat[,i][!is.na(mat[,i])] == "1|0"))/sum(!is.na(mat[,i]))
}
sample.info$sim.het<-as.numeric(as.character(sample.info$sim.het))
#plot heterozygosity violin plots
ggplot(sample.info, aes(x=sim.pop, y=sim.het)) +
#geom_violin(trim = FALSE)+
geom_dotplot(binaxis='y', stackdir='center', dotsize = 2, alpha=.75,aes(fill=sample.loc))+
theme_classic()+
labs(x="sampling locality",y="heterozygosity")
## Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
#define each population
mac <- population("mac", time = 1, N = 10000)
ocai <- population("ocai", time = 50, N = 10000, parent = mac)
hyb <- population("hyb", time = 3500, N = 10000, parent = ocai)
#add in the gene flow edge
#add in the gene flow edge
gf <- gene_flow(from = ocai, to = hyb, start = 3501, end = 4500, 0.75)
gf2 <- gene_flow(from = mac, to = hyb, start = 3501, end = 4500, 0.75)
gf3 <- gene_flow(from = hyb, to = mac, start = 3501, end = 4500, 0.75)
gf4 <- gene_flow(from = hyb, to = ocai, start =3501, end = 4500, 0.75)
#compile the model
model <- compile_model(populations = list(mac,ocai,hyb),
gene_flow = list(gf,gf2,gf3,gf4),
generation_time = 1,
sim_length = 5000,
path = "~/Downloads/slim.examples/tow4",
overwrite = TRUE, force = TRUE)
#plot the model to make sure you set it up correctly
plot_model(model, sizes = TRUE, proportions = TRUE)
#sim the model in msprime
msprime(model, sequence_length = 1e7, recombination_rate = 1e-8,
sampling = present_samples, random_seed = 315)
#load in tree sequence
ts <- ts_load(model)
#add mutations to simulation
ts <- ts_mutate(ts, mutation_rate = 1e-8, random_seed = 3141)
#write out vcf
ts_vcf(ts, path = "~/Downloads/slim.examples/output.4.vcf.gz")
#read in vcf
vcfR <- read.vcfR("~/Downloads/slim.examples/output.4.vcf.gz")
## Scanning file to determine attributes.
## File attributes:
## meta lines: 5
## header_line: 6
## variant count: 26391
## column count: 41
##
Meta line 5 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 26391
## Character matrix gt cols: 41
## skip: 0
## nrows: 26391
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant: 26391
## All variants processed
#convert to genlight
gen<-vcfR2genlight(vcfR)
## Warning in vcfR2genlight(vcfR): Found 24 loci with more than two alleles.
## Objects of class genlight only support loci with two alleles.
## 24 loci will be omitted from the genlight object.
#
pop(gen)<-gen@ind.names
sample.div <- stamppNeisD(gen, pop = FALSE)
plot(nj(sample.div), type = "unrooted", cex = 1)
ts_fst(ts, sample.sets)
## # A tibble: 3 × 3
## x y Fst
## <chr> <chr> <dbl>
## 1 mac ocai 0.0425
## 2 mac hyb 0.0223
## 3 ocai hyb 0.0268
#calculate heterozygosity per sample
mat<-extract.gt(vcfR)
sample.info$sim.het<-c(rep("hyb", times=8),rep("mac", times=16),rep("ocai", times=8))
sample.info$sim.pop<-c(rep("hyb", times=8),rep("mac", times=16),rep("ocai", times=8))
#write loop to calculate heterozygosity
for (i in 1:ncol(mat)){
sample.info$sim.het[i]<-(sum(mat[,i][!is.na(mat[,i])] == "0|1")+sum(mat[,i][!is.na(mat[,i])] == "1|0"))/sum(!is.na(mat[,i]))
}
sample.info$sim.het<-as.numeric(as.character(sample.info$sim.het))
#plot heterozygosity violin plots
ggplot(sample.info, aes(x=sim.pop, y=sim.het)) +
#geom_violin(trim = FALSE)+
geom_dotplot(binaxis='y', stackdir='center', dotsize = 2, alpha=.75,aes(fill=sample.loc))+
theme_classic()+
labs(x="sampling locality",y="heterozygosity")
## Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
#define each population
mac <- population("mac", time = 1, N = 10000)
ocai <- population("ocai", time = 50, N = 10000, parent = mac)
hyb <- population("hyb", time = 10000, N = 10000, parent = ocai)
#add in the gene flow edge
#add in the gene flow edge
gf <- gene_flow(from = ocai, to = hyb, start = 10001, end = 15000, 0.5)
gf2 <- gene_flow(from = mac, to = hyb, start = 10001, end = 15000, 0.5)
gf3 <- gene_flow(from = hyb, to = mac, start = 10001, end = 15000, 0.5)
gf4 <- gene_flow(from = hyb, to = ocai, start =10001, end = 15000, 0.5)
#compile the model
model <- compile_model(populations = list(mac,ocai,hyb),
gene_flow = list(gf,gf2,gf3,gf4),
generation_time = 1,
sim_length = 20000,
path = "~/Downloads/slim.examples/tow5",
overwrite = TRUE, force = TRUE)
#plot the model to make sure you set it up correctly
plot_model(model, sizes = TRUE, proportions = TRUE)
#schedule sampling
present_samples <- schedule_sampling(model, times = 20000, list(mac, 16), list(hyb, 8), list(ocai, 8), strict = TRUE)
present_samples
## # A tibble: 3 × 7
## time pop n y_orig x_orig y x
## <int> <chr> <int> <lgl> <lgl> <lgl> <lgl>
## 1 20000 mac 16 NA NA NA NA
## 2 20000 hyb 8 NA NA NA NA
## 3 20000 ocai 8 NA NA NA NA
#sim the model in msprime
msprime(model, sequence_length = 1e7, recombination_rate = 1e-8,
sampling = present_samples, random_seed = 315)
#load in tree sequence
ts <- ts_load(model)
#add mutations to simulation
ts <- ts_mutate(ts, mutation_rate = 1e-8, random_seed = 3141)
#write out vcf
ts_vcf(ts, path = "~/Downloads/slim.examples/output.5.vcf.gz")
#read in vcf
vcfR <- read.vcfR("~/Downloads/slim.examples/output.5.vcf.gz")
## Scanning file to determine attributes.
## File attributes:
## meta lines: 5
## header_line: 6
## variant count: 37447
## column count: 41
##
Meta line 5 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 37447
## Character matrix gt cols: 41
## skip: 0
## nrows: 37447
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant: 37447
## All variants processed
#convert to genlight
gen<-vcfR2genlight(vcfR)
## Warning in vcfR2genlight(vcfR): Found 41 loci with more than two alleles.
## Objects of class genlight only support loci with two alleles.
## 41 loci will be omitted from the genlight object.
#
pop(gen)<-gen@ind.names
sample.div <- stamppNeisD(gen, pop = FALSE)
plot(nj(sample.div), type = "unrooted", cex = 1)
ts_fst(ts, sample.sets)
## # A tibble: 3 × 3
## x y Fst
## <chr> <chr> <dbl>
## 1 mac ocai 0.232
## 2 mac hyb 0.181
## 3 ocai hyb 0.157
#calculate heterozygosity per sample
mat<-extract.gt(vcfR)
sample.info$sim.het<-c(rep("hyb", times=8),rep("mac", times=16),rep("ocai", times=8))
sample.info$sim.pop<-c(rep("hyb", times=8),rep("mac", times=16),rep("ocai", times=8))
#write loop to calculate heterozygosity
for (i in 1:ncol(mat)){
sample.info$sim.het[i]<-(sum(mat[,i][!is.na(mat[,i])] == "0|1")+sum(mat[,i][!is.na(mat[,i])] == "1|0"))/sum(!is.na(mat[,i]))
}
sample.info$sim.het<-as.numeric(as.character(sample.info$sim.het))
#plot heterozygosity violin plots
ggplot(sample.info, aes(x=sim.pop, y=sim.het)) +
#geom_violin(trim = FALSE)+
geom_dotplot(binaxis='y', stackdir='center', dotsize = 2, alpha=.75,aes(fill=sample.loc))+
theme_classic()+
labs(x="sampling locality",y="heterozygosity")
## Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
#define each population
mac <- population("mac", time = 1, N = 25000)
ocai <- population("ocai", time = 50, N = 25000, parent = mac)
hyb <- population("hyb", time = 10000, N = 25000, parent = ocai)
#add in the gene flow edge
#add in the gene flow edge
gf <- gene_flow(from = ocai, to = hyb, start = 10001, end = 15000, 0.8)
gf2 <- gene_flow(from = mac, to = hyb, start = 10001, end = 15000, 0.8)
gf3 <- gene_flow(from = hyb, to = mac, start = 10001, end = 15000, 0.8)
gf4 <- gene_flow(from = hyb, to = ocai, start =10001, end = 15000, 0.8)
gf.x <- gene_flow(from = ocai, to = hyb, start = 15001, end = 20000, 0.2)
gf2.x <- gene_flow(from = mac, to = hyb, start = 15001, end = 20000, 0.2)
gf3.x <- gene_flow(from = hyb, to = mac, start = 15001, end = 20000, 0.2)
gf4.x <- gene_flow(from = hyb, to = ocai, start =15001, end = 20000, 0.2)
#compile the model
model <- compile_model(populations = list(mac,ocai,hyb),
gene_flow = list(gf,gf2,gf3,gf4,gf.x,gf2.x,gf3.x,gf4.x),
generation_time = 1,
sim_length = 20000,
path = "~/Downloads/slim.examples/tow6",
overwrite = TRUE, force = TRUE)
#plot the model to make sure you set it up correctly
plot_model(model, sizes = TRUE, proportions = TRUE)
#schedule sampling
present_samples <- schedule_sampling(model, times = 20000, list(mac, 16), list(hyb, 8), list(ocai, 8), strict = TRUE)
present_samples
## # A tibble: 3 × 7
## time pop n y_orig x_orig y x
## <int> <chr> <int> <lgl> <lgl> <lgl> <lgl>
## 1 20000 mac 16 NA NA NA NA
## 2 20000 hyb 8 NA NA NA NA
## 3 20000 ocai 8 NA NA NA NA
#sim the model in msprime
msprime(model, sequence_length = 1e7, recombination_rate = 1e-8,
sampling = present_samples, random_seed = 315)
#load in tree sequence
ts <- ts_load(model)
#add mutations to simulation
ts <- ts_mutate(ts, mutation_rate = 1e-8, random_seed = 3141)
#write out vcf
ts_vcf(ts, path = "~/Downloads/slim.examples/output.6.vcf.gz")
#read in vcf
vcfR <- read.vcfR("~/Downloads/slim.examples/output.6.vcf.gz")
## Scanning file to determine attributes.
## File attributes:
## meta lines: 5
## header_line: 6
## variant count: 75885
## column count: 41
##
Meta line 5 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 75885
## Character matrix gt cols: 41
## skip: 0
## nrows: 75885
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant: 75885
## All variants processed
#convert to genlight
gen<-vcfR2genlight(vcfR)
## Warning in vcfR2genlight(vcfR): Found 198 loci with more than two alleles.
## Objects of class genlight only support loci with two alleles.
## 198 loci will be omitted from the genlight object.
#
pop(gen)<-gen@ind.names
sample.div <- stamppNeisD(gen, pop = FALSE)
plot(nj(sample.div), type = "unrooted", cex = 1)
ts_fst(ts, sample.sets)
## # A tibble: 3 × 3
## x y Fst
## <chr> <chr> <dbl>
## 1 mac ocai 0.0717
## 2 mac hyb 0.0476
## 3 ocai hyb 0.0381
#calculate heterozygosity per sample
mat<-extract.gt(vcfR)
sample.info$sim.het<-c(rep("hyb", times=8),rep("mac", times=16),rep("ocai", times=8))
sample.info$sim.pop<-c(rep("hyb", times=8),rep("mac", times=16),rep("ocai", times=8))
#write loop to calculate heterozygosity
for (i in 1:ncol(mat)){
sample.info$sim.het[i]<-(sum(mat[,i][!is.na(mat[,i])] == "0|1")+sum(mat[,i][!is.na(mat[,i])] == "1|0"))/sum(!is.na(mat[,i]))
}
sample.info$sim.het<-as.numeric(as.character(sample.info$sim.het))
#plot heterozygosity violin plots
ggplot(sample.info, aes(x=sim.pop, y=sim.het)) +
#geom_violin(trim = FALSE)+
geom_dotplot(binaxis='y', stackdir='center', dotsize = 2, alpha=.75,aes(fill=sample.loc))+
theme_classic()+
labs(x="sampling locality",y="heterozygosity")
## Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
#define each population
mac <- population("mac", time = 1, N = 25000)
ocai <- population("ocai", time = 50, N = 25000, parent = mac)
hyb <- population("hyb", time = 10000, N = 25000, parent = ocai)
#add in the gene flow edge
#add in the gene flow edge
gf <- gene_flow(from = ocai, to = hyb, start = 10001, end = 15000, 0.2)
gf2 <- gene_flow(from = mac, to = hyb, start = 10001, end = 15000, 0.2)
gf3 <- gene_flow(from = hyb, to = mac, start = 10001, end = 15000, 0.2)
gf4 <- gene_flow(from = hyb, to = ocai, start =10001, end = 15000, 0.2)
gf.x <- gene_flow(from = ocai, to = hyb, start = 15001, end = 20000, 0.8)
gf2.x <- gene_flow(from = mac, to = hyb, start = 15001, end = 20000, 0.8)
gf3.x <- gene_flow(from = hyb, to = mac, start = 15001, end = 20000, 0.8)
gf4.x <- gene_flow(from = hyb, to = ocai, start =15001, end = 20000, 0.8)
#compile the model
model <- compile_model(populations = list(mac,ocai,hyb),
gene_flow = list(gf,gf2,gf3,gf4,gf.x,gf2.x,gf3.x,gf4.x),
generation_time = 1,
sim_length = 20000,
path = "~/Downloads/slim.examples/tow7",
overwrite = TRUE, force = TRUE)
#plot the model to make sure you set it up correctly
plot_model(model, sizes = TRUE, proportions = TRUE)
#schedule sampling
present_samples <- schedule_sampling(model, times = 20000, list(mac, 16), list(hyb, 8), list(ocai, 8), strict = TRUE)
present_samples
## # A tibble: 3 × 7
## time pop n y_orig x_orig y x
## <int> <chr> <int> <lgl> <lgl> <lgl> <lgl>
## 1 20000 mac 16 NA NA NA NA
## 2 20000 hyb 8 NA NA NA NA
## 3 20000 ocai 8 NA NA NA NA
#sim the model in msprime
msprime(model, sequence_length = 1e7, recombination_rate = 1e-8,
sampling = present_samples, random_seed = 315)
#load in tree sequence
ts <- ts_load(model)
#add mutations to simulation
ts <- ts_mutate(ts, mutation_rate = 1e-8, random_seed = 3141)
#write out vcf
ts_vcf(ts, path = "~/Downloads/slim.examples/output.7.vcf.gz")
#read in vcf
vcfR <- read.vcfR("~/Downloads/slim.examples/output.7.vcf.gz")
## Scanning file to determine attributes.
## File attributes:
## meta lines: 5
## header_line: 6
## variant count: 75827
## column count: 41
##
Meta line 5 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 75827
## Character matrix gt cols: 41
## skip: 0
## nrows: 75827
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant: 75827
## All variants processed
#convert to genlight
gen<-vcfR2genlight(vcfR)
## Warning in vcfR2genlight(vcfR): Found 197 loci with more than two alleles.
## Objects of class genlight only support loci with two alleles.
## 197 loci will be omitted from the genlight object.
#
pop(gen)<-gen@ind.names
sample.div <- stamppNeisD(gen, pop = FALSE)
plot(nj(sample.div), type = "unrooted", cex = 1)
ts_fst(ts, sample.sets)
## # A tibble: 3 × 3
## x y Fst
## <chr> <chr> <dbl>
## 1 mac ocai 0.0422
## 2 mac hyb 0.0228
## 3 ocai hyb 0.0156
#calculate heterozygosity per sample
mat<-extract.gt(vcfR)
sample.info$sim.het<-c(rep("hyb", times=8),rep("mac", times=16),rep("ocai", times=8))
sample.info$sim.pop<-c(rep("hyb", times=8),rep("mac", times=16),rep("ocai", times=8))
#write loop to calculate heterozygosity
for (i in 1:ncol(mat)){
sample.info$sim.het[i]<-(sum(mat[,i][!is.na(mat[,i])] == "0|1")+sum(mat[,i][!is.na(mat[,i])] == "1|0"))/sum(!is.na(mat[,i]))
}
sample.info$sim.het<-as.numeric(as.character(sample.info$sim.het))
#plot heterozygosity violin plots
ggplot(sample.info, aes(x=sim.pop, y=sim.het)) +
#geom_violin(trim = FALSE)+
geom_dotplot(binaxis='y', stackdir='center', dotsize = 2, alpha=.75,aes(fill=sample.loc))+
theme_classic()+
labs(x="sampling locality",y="heterozygosity")
## Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
#define each population
mac <- population("mac", time = 1, N = 25000)
ocai <- population("ocai", time = 50, N = 25000, parent = mac)
hyb <- population("hyb", time = 5000, N = 25000, parent = ocai)
#add in the gene flow edge
#add in the gene flow edge
gf <- gene_flow(from = ocai, to = hyb, start = 5001, end = 15000, 0.2)
gf2 <- gene_flow(from = mac, to = hyb, start = 5001, end = 15000, 0.2)
gf3 <- gene_flow(from = hyb, to = mac, start = 5001, end = 15000, 0.2)
gf4 <- gene_flow(from = hyb, to = ocai, start =5001, end = 15000, 0.2)
gf.x <- gene_flow(from = ocai, to = hyb, start = 15001, end = 20000, 0.8)
gf2.x <- gene_flow(from = mac, to = hyb, start = 15001, end = 20000, 0.8)
gf3.x <- gene_flow(from = hyb, to = mac, start = 15001, end = 20000, 0.8)
gf4.x <- gene_flow(from = hyb, to = ocai, start =15001, end = 20000, 0.8)
#compile the model
model <- compile_model(populations = list(mac,ocai,hyb),
gene_flow = list(gf,gf2,gf3,gf4,gf.x,gf2.x,gf3.x,gf4.x),
generation_time = 1,
sim_length = 20000,
path = "~/Downloads/slim.examples/tow8",
overwrite = TRUE, force = TRUE)
#plot the model to make sure you set it up correctly
plot_model(model, sizes = TRUE, proportions = TRUE)
#schedule sampling
present_samples <- schedule_sampling(model, times = 20000, list(mac, 16), list(hyb, 8), list(ocai, 8), strict = TRUE)
present_samples
## # A tibble: 3 × 7
## time pop n y_orig x_orig y x
## <int> <chr> <int> <lgl> <lgl> <lgl> <lgl>
## 1 20000 mac 16 NA NA NA NA
## 2 20000 hyb 8 NA NA NA NA
## 3 20000 ocai 8 NA NA NA NA
#sim the model in msprime
msprime(model, sequence_length = 1e7, recombination_rate = 1e-8,
sampling = present_samples, random_seed = 315)
#load in tree sequence
ts <- ts_load(model)
#add mutations to simulation
ts <- ts_mutate(ts, mutation_rate = 1e-8, random_seed = 3141)
#write out vcf
ts_vcf(ts, path = "~/Downloads/slim.examples/output.8.vcf.gz")
#read in vcf
vcfR <- read.vcfR("~/Downloads/slim.examples/output.8.vcf.gz")
## Scanning file to determine attributes.
## File attributes:
## meta lines: 5
## header_line: 6
## variant count: 78467
## column count: 41
##
Meta line 5 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 78467
## Character matrix gt cols: 41
## skip: 0
## nrows: 78467
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant: 78467
## All variants processed
#convert to genlight
gen<-vcfR2genlight(vcfR)
## Warning in vcfR2genlight(vcfR): Found 235 loci with more than two alleles.
## Objects of class genlight only support loci with two alleles.
## 235 loci will be omitted from the genlight object.
#
pop(gen)<-gen@ind.names
sample.div <- stamppNeisD(gen, pop = FALSE)
plot(nj(sample.div), type = "unrooted", cex = 1)
ts_fst(ts, sample.sets)
## # A tibble: 3 × 3
## x y Fst
## <chr> <chr> <dbl>
## 1 mac ocai 0.0409
## 2 mac hyb 0.0196
## 3 ocai hyb 0.0160
#calculate heterozygosity per sample
mat<-extract.gt(vcfR)
sample.info$sim.het<-c(rep("hyb", times=8),rep("mac", times=16),rep("ocai", times=8))
sample.info$sim.pop<-c(rep("hyb", times=8),rep("mac", times=16),rep("ocai", times=8))
#write loop to calculate heterozygosity
for (i in 1:ncol(mat)){
sample.info$sim.het[i]<-(sum(mat[,i][!is.na(mat[,i])] == "0|1")+sum(mat[,i][!is.na(mat[,i])] == "1|0"))/sum(!is.na(mat[,i]))
}
sample.info$sim.het<-as.numeric(as.character(sample.info$sim.het))
#plot heterozygosity violin plots
ggplot(sample.info, aes(x=sim.pop, y=sim.het)) +
#geom_violin(trim = FALSE)+
geom_dotplot(binaxis='y', stackdir='center', dotsize = 2, alpha=.75,aes(fill=sample.loc))+
theme_classic()+
labs(x="sampling locality",y="heterozygosity")
## Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
#define each population
mac <- population("mac", time = 1, N = 25000)
ocai <- population("ocai", time = 50, N = 25000, parent = mac)
hyb <- population("hyb", time = 5000, N = 25000, parent = ocai)
#add in the gene flow edge
#add in the gene flow edge
gf <- gene_flow(from = ocai, to = hyb, start = 5001, end = 15000, 0.8)
gf2 <- gene_flow(from = mac, to = hyb, start = 5001, end = 15000, 0.8)
gf3 <- gene_flow(from = hyb, to = mac, start = 5001, end = 15000, 0.8)
gf4 <- gene_flow(from = hyb, to = ocai, start =5001, end = 15000, 0.8)
gf.x <- gene_flow(from = ocai, to = hyb, start = 15001, end = 20000, 0.2)
gf2.x <- gene_flow(from = mac, to = hyb, start = 15001, end = 20000, 0.2)
gf3.x <- gene_flow(from = hyb, to = mac, start = 15001, end = 20000, 0.2)
gf4.x <- gene_flow(from = hyb, to = ocai, start =15001, end = 20000, 0.2)
#compile the model
model <- compile_model(populations = list(mac,ocai,hyb),
gene_flow = list(gf,gf2,gf3,gf4,gf.x,gf2.x,gf3.x,gf4.x),
generation_time = 1,
sim_length = 20000,
path = "~/Downloads/slim.examples/tow9",
overwrite = TRUE, force = TRUE)
#plot the model to make sure you set it up correctly
plot_model(model, sizes = TRUE, proportions = TRUE)
#schedule sampling
present_samples <- schedule_sampling(model, times = 20000, list(mac, 16), list(hyb, 8), list(ocai, 8), strict = TRUE)
present_samples
## # A tibble: 3 × 7
## time pop n y_orig x_orig y x
## <int> <chr> <int> <lgl> <lgl> <lgl> <lgl>
## 1 20000 mac 16 NA NA NA NA
## 2 20000 hyb 8 NA NA NA NA
## 3 20000 ocai 8 NA NA NA NA
#sim the model in msprime
msprime(model, sequence_length = 1e7, recombination_rate = 1e-8,
sampling = present_samples, random_seed = 315)
#load in tree sequence
ts <- ts_load(model)
#add mutations to simulation
ts <- ts_mutate(ts, mutation_rate = 1e-8, random_seed = 3141)
#write out vcf
ts_vcf(ts, path = "~/Downloads/slim.examples/output.9.vcf.gz")
#read in vcf
vcfR <- read.vcfR("~/Downloads/slim.examples/output.9.vcf.gz")
## Scanning file to determine attributes.
## File attributes:
## meta lines: 5
## header_line: 6
## variant count: 77888
## column count: 41
##
Meta line 5 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 77888
## Character matrix gt cols: 41
## skip: 0
## nrows: 77888
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant: 77888
## All variants processed
#convert to genlight
gen<-vcfR2genlight(vcfR)
## Warning in vcfR2genlight(vcfR): Found 214 loci with more than two alleles.
## Objects of class genlight only support loci with two alleles.
## 214 loci will be omitted from the genlight object.
#
pop(gen)<-gen@ind.names
sample.div <- stamppNeisD(gen, pop = FALSE)
plot(nj(sample.div), type = "unrooted", cex = 1)
ts_fst(ts, sample.sets)
## # A tibble: 3 × 3
## x y Fst
## <chr> <chr> <dbl>
## 1 mac ocai 0.0733
## 2 mac hyb 0.0469
## 3 ocai hyb 0.0382
#calculate heterozygosity per sample
mat<-extract.gt(vcfR)
sample.info$sim.het<-c(rep("hyb", times=8),rep("mac", times=16),rep("ocai", times=8))
sample.info$sim.pop<-c(rep("hyb", times=8),rep("mac", times=16),rep("ocai", times=8))
#write loop to calculate heterozygosity
for (i in 1:ncol(mat)){
sample.info$sim.het[i]<-(sum(mat[,i][!is.na(mat[,i])] == "0|1")+sum(mat[,i][!is.na(mat[,i])] == "1|0"))/sum(!is.na(mat[,i]))
}
sample.info$sim.het<-as.numeric(as.character(sample.info$sim.het))
#plot heterozygosity violin plots
ggplot(sample.info, aes(x=sim.pop, y=sim.het)) +
#geom_violin(trim = FALSE)+
geom_dotplot(binaxis='y', stackdir='center', dotsize = 2, alpha=.75,aes(fill=sample.loc))+
theme_classic()+
labs(x="sampling locality",y="heterozygosity")
## Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
#define each population
mac <- population("mac", time = 1, N = 25000)
ocai <- population("ocai", time = 50, N = 25000, parent = mac)
hyb <- population("hyb", time = 15000, N = 25000, parent = ocai)
#add in the gene flow edge
#add in the gene flow edge
gf.x <- gene_flow(from = ocai, to = hyb, start = 15001, end = 20000, 0.8)
gf2.x <- gene_flow(from = mac, to = hyb, start = 15001, end = 20000, 0.8)
gf3.x <- gene_flow(from = hyb, to = mac, start = 15001, end = 20000, 0.8)
gf4.x <- gene_flow(from = hyb, to = ocai, start =15001, end = 20000, 0.8)
#compile the model
model <- compile_model(populations = list(mac,ocai,hyb),
gene_flow = list(gf.x,gf2.x,gf3.x,gf4.x),
generation_time = 1,
sim_length = 20000,
path = "~/Downloads/slim.examples/tow10",
overwrite = TRUE, force = TRUE)
#plot the model to make sure you set it up correctly
plot_model(model, sizes = TRUE, proportions = TRUE)
#schedule sampling
present_samples <- schedule_sampling(model, times = 20000, list(mac, 16), list(hyb, 8), list(ocai, 8), strict = TRUE)
present_samples
## # A tibble: 3 × 7
## time pop n y_orig x_orig y x
## <int> <chr> <int> <lgl> <lgl> <lgl> <lgl>
## 1 20000 mac 16 NA NA NA NA
## 2 20000 hyb 8 NA NA NA NA
## 3 20000 ocai 8 NA NA NA NA
#sim the model in msprime
msprime(model, sequence_length = 1e7, recombination_rate = 1e-8,
sampling = present_samples, random_seed = 315)
#load in tree sequence
ts <- ts_load(model)
#add mutations to simulation
ts <- ts_mutate(ts, mutation_rate = 1e-8, random_seed = 3141)
#write out vcf
ts_vcf(ts, path = "~/Downloads/slim.examples/output.10.vcf.gz")
#read in vcf
vcfR <- read.vcfR("~/Downloads/slim.examples/output.10.vcf.gz")
## Scanning file to determine attributes.
## File attributes:
## meta lines: 5
## header_line: 6
## variant count: 72341
## column count: 41
##
Meta line 5 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 72341
## Character matrix gt cols: 41
## skip: 0
## nrows: 72341
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant: 72341
## All variants processed
#convert to genlight
gen<-vcfR2genlight(vcfR)
## Warning in vcfR2genlight(vcfR): Found 181 loci with more than two alleles.
## Objects of class genlight only support loci with two alleles.
## 181 loci will be omitted from the genlight object.
#
pop(gen)<-gen@ind.names
sample.div <- stamppNeisD(gen, pop = FALSE)
plot(nj(sample.div), type = "unrooted", cex = 1)
ts_fst(ts, sample.sets)
## # A tibble: 3 × 3
## x y Fst
## <chr> <chr> <dbl>
## 1 mac ocai 0.0515
## 2 mac hyb 0.0261
## 3 ocai hyb 0.0186
#calculate heterozygosity per sample
mat<-extract.gt(vcfR)
sample.info$sim.het<-c(rep("hyb", times=8),rep("mac", times=16),rep("ocai", times=8))
sample.info$sim.pop<-c(rep("hyb", times=8),rep("mac", times=16),rep("ocai", times=8))
#write loop to calculate heterozygosity
for (i in 1:ncol(mat)){
sample.info$sim.het[i]<-(sum(mat[,i][!is.na(mat[,i])] == "0|1")+sum(mat[,i][!is.na(mat[,i])] == "1|0"))/sum(!is.na(mat[,i]))
}
sample.info$sim.het<-as.numeric(as.character(sample.info$sim.het))
#plot heterozygosity violin plots
ggplot(sample.info, aes(x=sim.pop, y=sim.het)) +
#geom_violin(trim = FALSE)+
geom_dotplot(binaxis='y', stackdir='center', dotsize = 2, alpha=.75,aes(fill=sample.loc))+
theme_classic()+
labs(x="sampling locality",y="heterozygosity")
## Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
#define each population
mac <- population("mac", time = 1, N = 25000)
ocai <- population("ocai", time = 50, N = 25000, parent = mac)
hyb <- population("hyb", time = 15000, N = 25000, parent = ocai)
#add in the gene flow edge
#add in the gene flow edge
gf.x <- gene_flow(from = ocai, to = hyb, start = 15001, end = 20000, 0.8)
gf2.x <- gene_flow(from = mac, to = hyb, start = 15001, end = 20000, 0.8)
#compile the model
model <- compile_model(populations = list(mac,ocai,hyb),
gene_flow = list(gf.x,gf2.x),
generation_time = 1,
sim_length = 20000,
path = "~/Downloads/slim.examples/tow11",
overwrite = TRUE, force = TRUE)
#plot the model to make sure you set it up correctly
plot_model(model, sizes = TRUE, proportions = TRUE)
#schedule sampling
present_samples <- schedule_sampling(model, times = 20000, list(mac, 16), list(hyb, 8), list(ocai, 8), strict = TRUE)
present_samples
## # A tibble: 3 × 7
## time pop n y_orig x_orig y x
## <int> <chr> <int> <lgl> <lgl> <lgl> <lgl>
## 1 20000 mac 16 NA NA NA NA
## 2 20000 hyb 8 NA NA NA NA
## 3 20000 ocai 8 NA NA NA NA
#sim the model in msprime
msprime(model, sequence_length = 1e7, recombination_rate = 1e-8,
sampling = present_samples, random_seed = 315)
#load in tree sequence
ts <- ts_load(model)
#add mutations to simulation
ts <- ts_mutate(ts, mutation_rate = 1e-8, random_seed = 3141)
#write out vcf
ts_vcf(ts, path = "~/Downloads/slim.examples/output.11.vcf.gz")
#read in vcf
vcfR <- read.vcfR("~/Downloads/slim.examples/output.11.vcf.gz")
## Scanning file to determine attributes.
## File attributes:
## meta lines: 5
## header_line: 6
## variant count: 70541
## column count: 41
##
Meta line 5 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 70541
## Character matrix gt cols: 41
## skip: 0
## nrows: 70541
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant: 70541
## All variants processed
#convert to genlight
gen<-vcfR2genlight(vcfR)
## Warning in vcfR2genlight(vcfR): Found 175 loci with more than two alleles.
## Objects of class genlight only support loci with two alleles.
## 175 loci will be omitted from the genlight object.
#
pop(gen)<-gen@ind.names
sample.div <- stamppNeisD(gen, pop = FALSE)
plot(nj(sample.div), type = "unrooted", cex = 1)
ts_fst(ts, sample.sets)
## # A tibble: 3 × 3
## x y Fst
## <chr> <chr> <dbl>
## 1 mac ocai 0.167
## 2 mac hyb 0.0691
## 3 ocai hyb 0.0360
#calculate heterozygosity per sample
mat<-extract.gt(vcfR)
sample.info$sim.het<-c(rep("hyb", times=8),rep("mac", times=16),rep("ocai", times=8))
sample.info$sim.pop<-c(rep("hyb", times=8),rep("mac", times=16),rep("ocai", times=8))
#write loop to calculate heterozygosity
for (i in 1:ncol(mat)){
sample.info$sim.het[i]<-(sum(mat[,i][!is.na(mat[,i])] == "0|1")+sum(mat[,i][!is.na(mat[,i])] == "1|0"))/sum(!is.na(mat[,i]))
}
sample.info$sim.het<-as.numeric(as.character(sample.info$sim.het))
#plot heterozygosity violin plots
ggplot(sample.info, aes(x=sim.pop, y=sim.het)) +
#geom_violin(trim = FALSE)+
geom_dotplot(binaxis='y', stackdir='center', dotsize = 2, alpha=.75,aes(fill=sample.loc))+
theme_classic()+
labs(x="sampling locality",y="heterozygosity")
## Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.